library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggridges)
library(patchwork)
weather_df = 
  rnoaa::meteo_pull_monitors(
    c("USW00094728", "USW00022534", "USS0023B17S"),
    var = c("PRCP", "TMIN", "TMAX"), 
    date_min = "2021-01-01",
    date_max = "2022-12-31") |>
  mutate(
    name = recode(
      id, 
      USW00094728 = "CentralPark_NY", 
      USW00022534 = "Molokai_HI",
      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) |>
  select(name, id, everything())
## using cached file: /Users/jasminezhang/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2023-09-28 23:45:59.687583 (8.525)
## file min/max dates: 1869-01-01 / 2023-09-30
## using cached file: /Users/jasminezhang/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00022534.dly
## date created (size, mb): 2023-09-28 23:46:05.477127 (3.83)
## file min/max dates: 1949-10-01 / 2023-09-30
## using cached file: /Users/jasminezhang/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2023-09-28 23:46:07.516607 (0.994)
## file min/max dates: 1999-09-01 / 2023-09-30

This result in a dataframe with 2190 obs on six variables.

Basic plot

weather_df |> 
  ggplot(aes(x=tmin, y=tmax, color = name)) +
  geom_point(alpha=0.5) +
  labs(
    title = "Temp plot",
    x = "Min daily temp in C",
    y = "Max daily temp in C",
    color = "location", #title of the labels
    caption = "Max vs. Min daily temp in three locations"
  )
## Warning: Removed 17 rows containing missing values (`geom_point()`).

# Various scales: modify the default mapping between variable and aesthetics

weather_df |> 
  ggplot(aes(x=tmin, y=tmax, color = name)) +
  geom_point(alpha=0.5) +
  labs(
    title = "Temp plot",
    x = "Min daily temp in C",
    y = "Max daily temp in C",
    color = "location", #title of the labels
    caption = "Max vs. Min daily temp in three locations"
  ) + 
  scale_x_continuous(
    breaks = c(-15, 0, 15),
    labels = c("-15 C", "0", "15 C")
  ) +
  scale_y_continuous(
    position = "right",
    #trans = "sqrt" transform the variable on axis
    limits = c(0, 30)
  )
## Warning: Removed 302 rows containing missing values (`geom_point()`).

## color schemes

weather_df |> 
  ggplot(aes(x=tmin, y=tmax, color = name)) +
  geom_point(alpha=0.5) +
  labs(
    title = "Temp plot",
    x = "Min daily temp in C",
    y = "Max daily temp in C",
    color = "location", #title of the labels
    caption = "Max vs. Min daily temp in three locations"
  ) + #discrete: categorical variables 
  viridis::scale_color_viridis(discrete = TRUE)
## Warning: Removed 17 rows containing missing values (`geom_point()`).

# Themes

weather_df |> 
  ggplot(aes(x=tmin, y=tmax, color = name)) +
  geom_point(alpha=0.5) +
  labs(
    title = "Temp plot",
    x = "Min daily temp in C",
    y = "Max daily temp in C",
    color = "location", #title of the labels
    caption = "Max vs. Min daily temp in three locations"
  ) +
  viridis::scale_color_viridis(discrete = TRUE) + 
  theme_bw() + #change the background: overall look, global reset: order matters
  theme(legend.position = "bottom") #labels position 
## Warning: Removed 17 rows containing missing values (`geom_point()`).

Data arguments

weather_df |> 
  ggplot(aes(x=date, y=tmax)) +
  geom_point(aes(color = name)) +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values (`geom_point()`).

weather_df |> 
  ggplot(aes(x=date, y=tmax, color = name)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Removed 17 rows containing missing values (`geom_point()`).

nyc_weather_df = 
  weather_df |> 
  filter(name == "CentralPark_NY")
molokai_df = 
  weather_df |> 
  filter(name == "Molokai_HI")
#different dataset in different shape 
ggplot(data = molokai_df, aes(x = date, y = tmax, color = name)) + 
  geom_point() + 
  geom_line(data = nyc_weather_df) 
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Patchwork: plots of diff types

weather_df |> 
  ggplot(aes(x=date, y=tmax, color = name)) +
  geom_point() +
  facet_grid(. ~name)
## Warning: Removed 17 rows containing missing values (`geom_point()`).

#want differnet types of plot in diff panels 
ggp_temp_scatter =
  weather_df |> 
  ggplot(aes(x=tmin, y=tmax, color=name))+
  geom_point(alpha=0.5)+ 
  theme(legend.position = "none")

ggp_prcp_density = 
  weather_df |> 
  filter(prcp > 25) |> 
  ggplot(aes(x = prcp, fill = name)) + 
  geom_density(alpha = .5) + 
  theme(legend.position = "none")
#put together
ggp_temp_scatter+ggp_prcp_density
## Warning: Removed 17 rows containing missing values (`geom_point()`).

ggp_tmax_date = 
  weather_df |> 
  ggplot(aes(x = date, y = tmax, color = name)) + 
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE) + 
  theme(legend.position = "bottom")

#put together
(ggp_temp_scatter+ggp_prcp_density)/ggp_tmax_date
## Warning: Removed 17 rows containing missing values (`geom_point()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Removed 17 rows containing missing values (`geom_point()`).

Data manipulation

#name alphabetically ordered because it is character 
weather_df |> 
  ggplot(aes(x = name, y = tmax))+
  geom_boxplot()
## Warning: Removed 17 rows containing non-finite values (`stat_boxplot()`).

#relevel the name 
weather_df |>
  mutate(name = forcats::fct_relevel(name, c("Molokai_HI", "CentralPark_NY", "Waterhole_WA"))) |> 
  ggplot(aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  theme(legend.position = "bottom")
## Warning: Removed 17 rows containing non-finite values (`stat_ydensity()`).

#put name in order according to some other variable 
weather_df |>
  mutate(name = forcats::fct_reorder(name, tmax)) |> #compute the avg tmax and order
  ggplot(aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  theme(legend.position = "bottom")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `name = forcats::fct_reorder(name, tmax)`.
## Caused by warning:
## ! `fct_reorder()` removing 17 missing values.
## ℹ Use `.na_rm = TRUE` to silence this message.
## ℹ Use `.na_rm = FALSE` to preserve NAs.
## Warning: Removed 17 rows containing non-finite values (`stat_ydensity()`).

litter_data = 
  read_csv("./Data/FAS_litters.csv") |>
  janitor::clean_names() |>
  separate(group, into = c("dose", "day_of_tx"), sep = 3)
## Rows: 49 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Group, Litter Number
## dbl (6): GD0 weight, GD18 weight, GD of Birth, Pups born alive, Pups dead @ ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pup_data = 
  read_csv("./data/FAS_pups.csv") |>
  janitor::clean_names() |>
  mutate(
    sex = 
      case_match(
        sex, 
        1 ~ "male", 
        2 ~ "female"))
## Rows: 313 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Litter Number
## dbl (5): Sex, PD ears, PD eyes, PD pivot, PD walk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fas_data = left_join(pup_data, litter_data, by = "litter_number") 

fas_tidy = fas_data |> 
  select(dose, day_of_tx, pd_ears:pd_walk) |> 
  pivot_longer(
    pd_ears:pd_walk,
    names_to = "outcome", 
    values_to = "pn_day")

fas_data |> 
  select(dose, day_of_tx, pd_ears:pd_walk) |> 
  pivot_longer(
    pd_ears:pd_walk,
    names_to = "outcome", 
    values_to = "pn_day") |> 
  drop_na() |> 
  mutate(outcome = forcats::fct_reorder(outcome, pn_day, median)) |> 
  ggplot(aes(x = dose, y = pn_day)) + 
  geom_violin() + 
  facet_grid(day_of_tx ~ outcome)